{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "from __future__ import print_function \n",
    "from itertools import count \n",
    "from collections import defaultdict\n",
    "from scipy.sparse import csr    \n",
    "\n",
    "def vectorize_dic(dic, ix=None, p=None):\n",
    "    #     Creates a scipy csr matrix from a list of lists (each inner list is a set of values corresponding to a feature) \n",
    "\n",
    "    #     parameters:\n",
    "    #     -----------\n",
    "    #     dic -- dictionary of feature lists. Keys are the name of features\n",
    "    #     ix -- index generator (default None)\n",
    "    #     p -- dimension of featrure space (number of columns in the sparse matrix) (default None)\n",
    "    if (ix == None):\n",
    "        d = count(0)\n",
    "        ix = defaultdict(lambda: next(d)) # len2623 user 943 item 1680\n",
    "\n",
    "    n = len(list(dic.values())[0]) # num samples 90570\n",
    "    g = len(list(dic.keys())) # num groups 2\n",
    "    nz = n * g # number of non-zeros 181140\n",
    "\n",
    "    col_ix = np.empty(nz, dtype=int)     #181140\n",
    "\n",
    "    i = 0\n",
    "    for k, lis in dic.items():     \n",
    "        # append index el with k in order to prevet mapping different columns with same id to same index\n",
    "        # 遍历dic中两个key的value，通过ix得到user和item对应的在一行中的下标0-2622 [1682items', 2622]\n",
    "        # 最后得到的矩阵中，一行只有2个值为1，分别为0-942中的user和943-2622中item对应的列\n",
    "        # col_ix中相邻两值为 同行中user和item分别对应为1的下标\n",
    "        col_ix[i::g] = [ix[str(el) + str(k)] for el in lis]\n",
    "        i += 1\n",
    "\n",
    "    row_ix = np.repeat(np.arange(0, n), g)      \n",
    "    data = np.ones(nz) # 全1数组，用来给每行2个非0元素赋值\n",
    "\n",
    "    if (p == None):\n",
    "        p = len(ix)  # p = 2623\n",
    "\n",
    "    ixx = np.where(col_ix < p) # ixx [     0,      1,      2, ..., 181137, 181138, 181139]\n",
    "    \n",
    "    # row_ix  [    0,     0,     1, ..., 90568, 90569, 90569]\n",
    "    # col_ix  [   0,  943,    0, ..., 2154,  942, 2265]\n",
    "    return csr.csr_matrix((data[ixx],(row_ix[ixx], col_ix[ixx])), shape=(n, p)), ix"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "from sklearn.feature_extraction import DictVectorizer\n",
    "\n",
    "# laod data with pandas\n",
    "cols = ['user', 'item', 'rating', 'timestamp']\n",
    "train = pd.read_csv('./input/ua.base', delimiter='\\t', names=cols)\n",
    "test = pd.read_csv('./input/ua.test', delimiter='\\t', names=cols)\n",
    "\n",
    "# vectorize data and convert them to csr matrix\n",
    "X_train, ix = vectorize_dic({'users': train.user.values, 'items': train.item.values})\n",
    "# 不传ix 和 X_train.shape[1]的话，生成矩阵维数不一致\n",
    "X_test, ix = vectorize_dic({'users': test.user.values, 'items': test.item.values}, ix, X_train.shape[1])\n",
    "y_train = train.rating.values\n",
    "y_test= test.rating.values"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(90570, 4) (9430, 4)\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>user</th>\n",
       "      <th>item</th>\n",
       "      <th>rating</th>\n",
       "      <th>timestamp</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>5</td>\n",
       "      <td>874965758</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>876893171</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>878542960</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1</td>\n",
       "      <td>4</td>\n",
       "      <td>3</td>\n",
       "      <td>876893119</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1</td>\n",
       "      <td>5</td>\n",
       "      <td>3</td>\n",
       "      <td>889751712</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   user  item  rating  timestamp\n",
       "0     1     1       5  874965758\n",
       "1     1     2       3  876893171\n",
       "2     1     3       4  878542960\n",
       "3     1     4       3  876893119\n",
       "4     1     5       3  889751712"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "print (train.shape , test.shape)\n",
    "train.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(90570, 2623)\n",
      "(9430, 2623)\n"
     ]
    }
   ],
   "source": [
    "X_train = X_train.todense()\n",
    "X_test = X_test.todense()\n",
    "\n",
    "# print shape of data\n",
    "print(X_train.shape)\n",
    "print(X_test.shape)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Define FM Model with tensorflow\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "WARNING:tensorflow:From /anaconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py:263: colocate_with (from tensorflow.python.framework.ops) is deprecated and will be removed in a future version.\n",
      "Instructions for updating:\n",
      "Colocations handled automatically by placer.\n"
     ]
    }
   ],
   "source": [
    "\n",
    "import tensorflow as tf\n",
    "\n",
    "n, p = X_train.shape\n",
    "\n",
    "# number of latent factors\n",
    "k = 10\n",
    "\n",
    "# design matrix\n",
    "X = tf.placeholder('float', shape=[None, p])\n",
    "# target vector\n",
    "y = tf.placeholder('float', shape=[None, 1])\n",
    "\n",
    "# bias and weights\n",
    "w0 = tf.Variable(tf.zeros([1]))\n",
    "W = tf.Variable(tf.zeros([p]))\n",
    "\n",
    "# interaction factors, randomly initialized \n",
    "V = tf.Variable(tf.random_normal([k, p], stddev=0.01))\n",
    "\n",
    "# estimate of y, initialized to 0.\n",
    "y_hat = tf.Variable(tf.zeros([n, 1]))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Now we define how the output values y should be calculated\n",
    "Using the trick in Rendle's paper, the output of a give feature vector x can be calculated using the following equation. The next cell implements that with tensorflow operations"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/latex": [
       "$\\displaystyle \\hat{y}(\\mathbf{x}) = w_0 + \\sum_{j=1}^{p}w_jx_j + \\frac{1}{2} \\sum_{f=1}^{k} ((\\sum_{j=1}^{p}v_{j,f}x_j)^2-\\sum_{j=1}^{p}v_{j,f}^2 x_j^2)$"
      ],
      "text/plain": [
       "<IPython.core.display.Math object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "from IPython.display import display, Math, Latex\n",
    "display(Math(r'\\hat{y}(\\mathbf{x}) = w_0 + \\sum_{j=1}^{p}w_jx_j + \\frac{1}{2} \\sum_{f=1}^{k} ((\\sum_{j=1}^{p}v_{j,f}x_j)^2-\\sum_{j=1}^{p}v_{j,f}^2 x_j^2)'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "WARNING:tensorflow:From <ipython-input-17-7b11ec4227d9>:2: calling reduce_sum_v1 (from tensorflow.python.ops.math_ops) with keep_dims is deprecated and will be removed in a future version.\n",
      "Instructions for updating:\n",
      "keep_dims is deprecated, use keepdims instead\n"
     ]
    }
   ],
   "source": [
    "# Calculate output with FM equation\n",
    "linear_terms = tf.add(w0, tf.reduce_sum(tf.multiply(W, X), 1, keep_dims=True))\n",
    "pair_interactions = (tf.multiply(0.5,\n",
    "                    tf.reduce_sum(\n",
    "                        tf.subtract(\n",
    "                            tf.pow( tf.matmul(X, tf.transpose(V)), 2),\n",
    "                            tf.matmul(tf.pow(X, 2), tf.transpose(tf.pow(V, 2)))),\n",
    "                        1, keep_dims=True)))\n",
    "y_hat = tf.add(linear_terms, pair_interactions)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Loss function\n",
    "Here we implement FM point-wise loss function with tensorflow operations. The loss is defined as:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/latex": [
       "$\\displaystyle L = \\sum_{i=1}^{n} (y_i - \\hat{y}_i)^2 + \\lambda_w ||W||^2 + \\lambda_v ||V||^2$"
      ],
      "text/plain": [
       "<IPython.core.display.Math object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "display(Math(r'L = \\sum_{i=1}^{n} (y_i - \\hat{y}_i)^2 + \\lambda_w ||W||^2 + \\lambda_v ||V||^2'))\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [],
   "source": [
    "# L2 regularized sum of squares loss function over W and V\n",
    "lambda_w = tf.constant(0.001, name='lambda_w')\n",
    "lambda_v = tf.constant(0.001, name='lambda_v')\n",
    "\n",
    "l2_norm = (tf.reduce_sum(\n",
    "            tf.add(\n",
    "                tf.multiply(lambda_w, tf.pow(W, 2)),\n",
    "                tf.multiply(lambda_v, tf.pow(V, 2)))))\n",
    "\n",
    "error = tf.reduce_mean(tf.square(tf.subtract(y, y_hat)))\n",
    "loss = tf.add(error, l2_norm)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Optimization\n",
    "Given a loss function, tensorflow can automatically calculate the derivatives of the loss function and find the optimal values for the 'variables' of the loss function. Under the hood, gradient descent optimizer update model parameters iteratively with the following update rule:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/latex": [
       "$\\displaystyle \\Theta_{i+1} = \\Theta_{i} - \\eta \\frac{\\delta L}{\\delta \\Theta}$"
      ],
      "text/plain": [
       "<IPython.core.display.Math object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "display(Math(r'\\Theta_{i+1} = \\Theta_{i} - \\eta \\frac{\\delta L}{\\delta \\Theta}'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "WARNING:tensorflow:From /anaconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/ops/math_ops.py:3066: to_int32 (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.\n",
      "Instructions for updating:\n",
      "Use tf.cast instead.\n"
     ]
    }
   ],
   "source": [
    "optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.01).minimize(loss)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Preparing Batches\n",
    "With SGD or Adam optimization methods, you can update model parameters in mini-batches. The larges the size of mini-batches are, the faster is the optimization method but it can become harder to find the optimized parameters. Size of mini-batches is a trade-off between accuracy and complexity. Here we implement a method to generate mini-batches from the input data. Check this great weblog for an overview of gradient descent optimization methods."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [],
   "source": [
    "def batcher(X_, y_=None, batch_size=-1):\n",
    "    n_samples = X_.shape[0]\n",
    "\n",
    "    if batch_size == -1:\n",
    "        batch_size = n_samples\n",
    "    if batch_size < 1:\n",
    "        raise ValueError('Parameter batch_size={} is unsupported'.format(batch_size))\n",
    "\n",
    "    for i in range(0, n_samples, batch_size):\n",
    "        upper_bound = min(i + batch_size, n_samples)\n",
    "        ret_x = X_[i:upper_bound]\n",
    "        ret_y = None\n",
    "        if y_ is not None:\n",
    "            ret_y = y_[i:i + batch_size]\n",
    "            yield (ret_x, ret_y)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Lanching tensorflow graph and training the model\n",
    "Finally we can strat the tensorflow session to initialize the variabeles and optimize the model prameters. Training consists of running the optimizer operation and feeding the mini-batches to the optimizer."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [],
   "source": [
    "from tqdm import tqdm_notebook as tqdm\n",
    "\n",
    "epochs = 10\n",
    "batch_size = 1000\n",
    "\n",
    "# Launch the graph\n",
    "init = tf.global_variables_initializer()\n",
    "sess = tf.Session()\n",
    "\n",
    "sess.run(init)\n",
    "\n",
    "for epoch in range(epochs):\n",
    "    perm = np.random.permutation(X_train.shape[0])\n",
    "    # iterate over batches\n",
    "    for bX, bY in batcher(X_train[perm], y_train[perm], batch_size):\n",
    "        sess.run(optimizer, feed_dict={X: bX.reshape(-1, p), y: bY.reshape(-1, 1)})"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Evaluating the model\n",
    "We can now evaluate the trained model on out test set. We use RMSE to measure the error of predictions. Note that here we need to run the error operation."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1.1136427\n"
     ]
    }
   ],
   "source": [
    "errors = []\n",
    "for bX, bY in batcher(X_test, y_test):\n",
    "    errors.append(sess.run(error, feed_dict={X: bX.reshape(-1, p), y: bY.reshape(-1, 1)}))\n",
    "\n",
    "RMSE = np.sqrt(np.array(errors).mean())\n",
    "print(RMSE)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
